library(ncdf4)
library(car)
Loading required package: carData
## Precipitación
apcp <- nc_open("../../data/train/apcp_sfc_latlon_subset_19940101_20071231.nc")
## Flujo de radiación, onda larga
dlwrf <- nc_open("../../data/train/dlwrf_sfc_latlon_subset_19940101_20071231.nc")
## Flujo de radiación, onda corta
dswrf <- nc_open("../../data/train/dswrf_sfc_latlon_subset_19940101_20071231.nc")
## Presión atmosférica
pres_msl <- nc_open("../../data/train/pres_msl_latlon_subset_19940101_20071231.nc")
## Agua precipitable
pwat <- nc_open("../../data/train/pwat_eatm_latlon_subset_19940101_20071231.nc")
## Humedad específica, 2 metros sobre el nivel del mar
spfh <- nc_open("../../data/train/spfh_2m_latlon_subset_19940101_20071231.nc")
## Porcentaje de cielo nublado
tcdc <- nc_open("../../data/train/tcdc_eatm_latlon_subset_19940101_20071231.nc")
## Nivel de condensación
tcolc <- nc_open("../../data/train/tcolc_eatm_latlon_subset_19940101_20071231.nc")
## Temperatura máxma
tmax <- nc_open("../../data/train/tmax_2m_latlon_subset_19940101_20071231.nc")
## Temperatura mínima
tmin <- nc_open("../../data/train/tmin_2m_latlon_subset_19940101_20071231.nc")
## Temperatura actual, dos metros sobre la superficie
tmp.2m <- nc_open("../../data/train/tmp_2m_latlon_subset_19940101_20071231.nc")
## Temperatura en la superficie
tmp.sfc <-nc_open("../../data/train/tmp_sfc_latlon_subset_19940101_20071231.nc")
## Radiación onda larga en la superficie
ulwrf.sfc <- nc_open("../../data/train/ulwrf_sfc_latlon_subset_19940101_20071231.nc")
## Radiación onda larga en la atmosfera
ulwrf.atm <- nc_open("../../data/train/ulwrf_tatm_latlon_subset_19940101_20071231.nc")
## Radiación onda corta en la superficie
uswrf <- nc_open("../../data/train/uswrf_sfc_latlon_subset_19940101_20071231.nc")
stations <- read.csv("../../data/station_info.csv")
energy_train <- read.csv("../../data/train.csv")
Todos los predictores tienen el mismo formato:
3 variables:
Cada variable tiene 5 dimensiones:
print(time)
[1] 1700568 1700592 1700616 1700640 1700664 1700688 1700712 1700736 1700760 1700784 1700808
[12] 1700832 1700856 1700880 1700904 1700928 1700952 1700976 1701000 1701024 1701048 1701072
[23] 1701096 1701120 1701144 1701168 1701192 1701216 1701240 1701264 1701288 1701312 1701336
[34] 1701360 1701384 1701408 1701432 1701456 1701480 1701504 1701528 1701552 1701576 1701600
[45] 1701624 1701648 1701672 1701696 1701720 1701744 1701768 1701792 1701816 1701840 1701864
[56] 1701888 1701912 1701936 1701960 1701984 1702008 1702032 1702056 1702080 1702104 1702128
[67] 1702152 1702176 1702200 1702224 1702248 1702272 1702296 1702320 1702344 1702368 1702392
[78] 1702416 1702440 1702464 1702488 1702512 1702536 1702560 1702584 1702608 1702632 1702656
[89] 1702680 1702704 1702728 1702752 1702776 1702800 1702824 1702848 1702872 1702896 1702920
[100] 1702944 1702968 1702992 1703016 1703040 1703064 1703088 1703112 1703136 1703160 1703184
[111] 1703208 1703232 1703256 1703280 1703304 1703328 1703352 1703376 1703400 1703424 1703448
[122] 1703472 1703496 1703520 1703544 1703568 1703592 1703616 1703640 1703664 1703688 1703712
[133] 1703736 1703760 1703784 1703808 1703832 1703856 1703880 1703904 1703928 1703952 1703976
[144] 1704000 1704024 1704048 1704072 1704096 1704120 1704144 1704168 1704192 1704216 1704240
[155] 1704264 1704288 1704312 1704336 1704360 1704384 1704408 1704432 1704456 1704480 1704504
[166] 1704528 1704552 1704576 1704600 1704624 1704648 1704672 1704696 1704720 1704744 1704768
[177] 1704792 1704816 1704840 1704864 1704888 1704912 1704936 1704960 1704984 1705008 1705032
[188] 1705056 1705080 1705104 1705128 1705152 1705176 1705200 1705224 1705248 1705272 1705296
[199] 1705320 1705344 1705368 1705392 1705416 1705440 1705464 1705488 1705512 1705536 1705560
[210] 1705584 1705608 1705632 1705656 1705680 1705704 1705728 1705752 1705776 1705800 1705824
[221] 1705848 1705872 1705896 1705920 1705944 1705968 1705992 1706016 1706040 1706064 1706088
[232] 1706112 1706136 1706160 1706184 1706208 1706232 1706256 1706280 1706304 1706328 1706352
[243] 1706376 1706400 1706424 1706448 1706472 1706496 1706520 1706544 1706568 1706592 1706616
[254] 1706640 1706664 1706688 1706712 1706736 1706760 1706784 1706808 1706832 1706856 1706880
[265] 1706904 1706928 1706952 1706976 1707000 1707024 1707048 1707072 1707096 1707120 1707144
[276] 1707168 1707192 1707216 1707240 1707264 1707288 1707312 1707336 1707360 1707384 1707408
[287] 1707432 1707456 1707480 1707504 1707528 1707552 1707576 1707600 1707624 1707648 1707672
[298] 1707696 1707720 1707744 1707768 1707792 1707816 1707840 1707864 1707888 1707912 1707936
[309] 1707960 1707984 1708008 1708032 1708056 1708080 1708104 1708128 1708152 1708176 1708200
[320] 1708224 1708248 1708272 1708296 1708320 1708344 1708368 1708392 1708416 1708440 1708464
[331] 1708488 1708512 1708536 1708560 1708584 1708608 1708632 1708656 1708680 1708704 1708728
[342] 1708752 1708776 1708800 1708824 1708848 1708872 1708896 1708920 1708944 1708968 1708992
[353] 1709016 1709040 1709064 1709088 1709112 1709136 1709160 1709184 1709208 1709232 1709256
[364] 1709280 1709304 1709328 1709352 1709376 1709400 1709424 1709448 1709472 1709496 1709520
[375] 1709544 1709568 1709592 1709616 1709640 1709664 1709688 1709712 1709736 1709760 1709784
[386] 1709808 1709832 1709856 1709880 1709904 1709928 1709952 1709976 1710000 1710024 1710048
[397] 1710072 1710096 1710120 1710144 1710168 1710192 1710216 1710240 1710264 1710288 1710312
[408] 1710336 1710360 1710384 1710408 1710432 1710456 1710480 1710504 1710528 1710552 1710576
[419] 1710600 1710624 1710648 1710672 1710696 1710720 1710744 1710768 1710792 1710816 1710840
[430] 1710864 1710888 1710912 1710936 1710960 1710984 1711008 1711032 1711056 1711080 1711104
[441] 1711128 1711152 1711176 1711200 1711224 1711248 1711272 1711296 1711320 1711344 1711368
[452] 1711392 1711416 1711440 1711464 1711488 1711512 1711536 1711560 1711584 1711608 1711632
[463] 1711656 1711680 1711704 1711728 1711752 1711776 1711800 1711824 1711848 1711872 1711896
[474] 1711920 1711944 1711968 1711992 1712016 1712040 1712064 1712088 1712112 1712136 1712160
[485] 1712184 1712208 1712232 1712256 1712280 1712304 1712328 1712352 1712376 1712400 1712424
[496] 1712448 1712472 1712496 1712520 1712544 1712568 1712592 1712616 1712640 1712664 1712688
[507] 1712712 1712736 1712760 1712784 1712808 1712832 1712856 1712880 1712904 1712928 1712952
[518] 1712976 1713000 1713024 1713048 1713072 1713096 1713120 1713144 1713168 1713192 1713216
[529] 1713240 1713264 1713288 1713312 1713336 1713360 1713384 1713408 1713432 1713456 1713480
[540] 1713504 1713528 1713552 1713576 1713600 1713624 1713648 1713672 1713696 1713720 1713744
[551] 1713768 1713792 1713816 1713840 1713864 1713888 1713912 1713936 1713960 1713984 1714008
[562] 1714032 1714056 1714080 1714104 1714128 1714152 1714176 1714200 1714224 1714248 1714272
[573] 1714296 1714320 1714344 1714368 1714392 1714416 1714440 1714464 1714488 1714512 1714536
[584] 1714560 1714584 1714608 1714632 1714656 1714680 1714704 1714728 1714752 1714776 1714800
[595] 1714824 1714848 1714872 1714896 1714920 1714944 1714968 1714992 1715016 1715040 1715064
[606] 1715088 1715112 1715136 1715160 1715184 1715208 1715232 1715256 1715280 1715304 1715328
[617] 1715352 1715376 1715400 1715424 1715448 1715472 1715496 1715520 1715544 1715568 1715592
[628] 1715616 1715640 1715664 1715688 1715712 1715736 1715760 1715784 1715808 1715832 1715856
[639] 1715880 1715904 1715928 1715952 1715976 1716000 1716024 1716048 1716072 1716096 1716120
[650] 1716144 1716168 1716192 1716216 1716240 1716264 1716288 1716312 1716336 1716360 1716384
[661] 1716408 1716432 1716456 1716480 1716504 1716528 1716552 1716576 1716600 1716624 1716648
[672] 1716672 1716696 1716720 1716744 1716768 1716792 1716816 1716840 1716864 1716888 1716912
[683] 1716936 1716960 1716984 1717008 1717032 1717056 1717080 1717104 1717128 1717152 1717176
[694] 1717200 1717224 1717248 1717272 1717296 1717320 1717344 1717368 1717392 1717416 1717440
[705] 1717464 1717488 1717512 1717536 1717560 1717584 1717608 1717632 1717656 1717680 1717704
[716] 1717728 1717752 1717776 1717800 1717824 1717848 1717872 1717896 1717920 1717944 1717968
[727] 1717992 1718016 1718040 1718064 1718088 1718112 1718136 1718160 1718184 1718208 1718232
[738] 1718256 1718280 1718304 1718328 1718352 1718376 1718400 1718424 1718448 1718472 1718496
[749] 1718520 1718544 1718568 1718592 1718616 1718640 1718664 1718688 1718712 1718736 1718760
[760] 1718784 1718808 1718832 1718856 1718880 1718904 1718928 1718952 1718976 1719000 1719024
[771] 1719048 1719072 1719096 1719120 1719144 1719168 1719192 1719216 1719240 1719264 1719288
[782] 1719312 1719336 1719360 1719384 1719408 1719432 1719456 1719480 1719504 1719528 1719552
[793] 1719576 1719600 1719624 1719648 1719672 1719696 1719720 1719744 1719768 1719792 1719816
[804] 1719840 1719864 1719888 1719912 1719936 1719960 1719984 1720008 1720032 1720056 1720080
[815] 1720104 1720128 1720152 1720176 1720200 1720224 1720248 1720272 1720296 1720320 1720344
[826] 1720368 1720392 1720416 1720440 1720464 1720488 1720512 1720536 1720560 1720584 1720608
[837] 1720632 1720656 1720680 1720704 1720728 1720752 1720776 1720800 1720824 1720848 1720872
[848] 1720896 1720920 1720944 1720968 1720992 1721016 1721040 1721064 1721088 1721112 1721136
[859] 1721160 1721184 1721208 1721232 1721256 1721280 1721304 1721328 1721352 1721376 1721400
[870] 1721424 1721448 1721472 1721496 1721520 1721544 1721568 1721592 1721616 1721640 1721664
[881] 1721688 1721712 1721736 1721760 1721784 1721808 1721832 1721856 1721880 1721904 1721928
[892] 1721952 1721976 1722000 1722024 1722048 1722072 1722096 1722120 1722144 1722168 1722192
[903] 1722216 1722240 1722264 1722288 1722312 1722336 1722360 1722384 1722408 1722432 1722456
[914] 1722480 1722504 1722528 1722552 1722576 1722600 1722624 1722648 1722672 1722696 1722720
[925] 1722744 1722768 1722792 1722816 1722840 1722864 1722888 1722912 1722936 1722960 1722984
[936] 1723008 1723032 1723056 1723080 1723104 1723128 1723152 1723176 1723200 1723224 1723248
[947] 1723272 1723296 1723320 1723344 1723368 1723392 1723416 1723440 1723464 1723488 1723512
[958] 1723536 1723560 1723584 1723608 1723632 1723656 1723680 1723704 1723728 1723752 1723776
[969] 1723800 1723824 1723848 1723872 1723896 1723920 1723944 1723968 1723992 1724016 1724040
[980] 1724064 1724088 1724112 1724136 1724160 1724184 1724208 1724232 1724256 1724280 1724304
[991] 1724328 1724352 1724376 1724400 1724424 1724448 1724472 1724496 1724520 1724544
[ reached getOption("max.print") -- omitted 4113 entries ]
Cada variable tiene exactamente las mismas cinco dimensiones. Del resumen impreso arriba podemos rescatar los siguientes valores importantes:
| Dimensión | Tamaño | Valor Mínimo | Valor Máximo |
|---|---|---|---|
| Latitud | 9 | 31 | 39 |
| Longitud | 16 | 254 | 269 |
| Ensemble | 11 | 1 | 11 |
| fHour | 5 | 12 | 24 |
| Tiempo | 5113 | 1700586 | 1823256 |
Representan valores en la zona geográfica de interés.
En esta área, solo toman los siguientes valores:
Mapa de las estaciones meteorológicas y los sitios de medición
En estos datos, la longitud está en grados positivos desde el meridiano cero, llegando hasta 360°. Por otro lado, las coordenadas de las estaciones meteorológicas tienen este dato en valores que van de 0° a 180°, ya sea al Este u Oeste.
head(stations)
Esto será tomado en cuenta al etiquetar la longitud en los datos de entrenamiento.
# Primero obtener las variables de interés
apcp.data <- ncvar_get(apcp, "Total_precipitation")
dlwrf.data <- ncvar_get(dlwrf, "Downward_Long-Wave_Rad_Flux")
dswrf.data <- ncvar_get(dswrf, "Downward_Short-Wave_Rad_Flux")
pres_msl.data <- ncvar_get(pres_msl, "Pressure")
pwat.data <- ncvar_get(pwat, "Precipitable_water")
spfh.data <- ncvar_get(spfh, "Specific_humidity_height_above_ground")
tcdc.data <- ncvar_get(tcdc, "Total_cloud_cover")
tcolc.data <- ncvar_get(tcolc, "Total_Column-Integrated_Condensate")
tmax.data <- ncvar_get(tmax, "Maximum_temperature")
tmin.data <- ncvar_get(tmin, "Minimum_temperature")
tmp2m.data <- ncvar_get(tmp.2m, "Temperature_height_above_ground")
tmpsfc.data <- ncvar_get(tmp.sfc, "Temperature_surface")
ulwrfsfc.data <- ncvar_get(ulwrf.sfc, "Upward_Long-Wave_Rad_Flux_surface")
ulwrfatm.data <- ncvar_get(ulwrf.atm, "Upward_Long-Wave_Rad_Flux")
uswrf.data <- ncvar_get(uswrf, "Upward_Short-Wave_Rad_Flux")
print(dimnames(apcp.data))
NULL
apcp.data <- nameCols(apcp.data)
dlwrf.data <- nameCols(dlwrf.data)
dswrf.data <- nameCols(dswrf.data)
pres_msl.data <- nameCols(pres_msl.data)
pwat.data <- nameCols(pwat.data)
spfh.data <- nameCols(spfh.data)
tcdc.data <- nameCols(tcdc.data)
tcolc.data <- nameCols(tcolc.data)
tmax.data <- nameCols(tmax.data)
tmin.data <- nameCols(tmin.data)
tmp2m.data <- nameCols(tmp2m.data)
tmpsfc.data <- nameCols(tmpsfc.data)
ulwrfsfc.data <- nameCols(ulwrfsfc.data)
ulwrfatm.data <- nameCols(ulwrfatm.data)
uswrf.data <- nameCols(uswrf.data)
# Muestra de como queda etiquetado
#print(dlwrf.data[,,,,])
De acuerdo con el glosario de la agencia que provee el conjunto de datos, un ensemble se define como:
Colección de modelos numéricos que muestran resultados ligeramente diferentes.
e1 <- dlwrf.data[,,,1,]
e2 <- dlwrf.data[,,,2,]
e3 <- dlwrf.data[,,,3,]
e4 <- dlwrf.data[,,,4,]
e5 <- dlwrf.data[,,,5,]
comparativo <- c(e1[1,1,1,1], e2[1,1,1,1], e3[1,1,1,1], e4[1,1,1,1], e5[1,1,1,1])
print(comparativo)
[1] 247.0190 246.0190 247.5191 248.0089 247.2623
El primer ensemble es el de control, y es el que será utilizado para este modelo.
apcp.data.e1 <- apcp.data[,,,1,]
dlwrf.data.e1 <- dlwrf.data[,,,1,]
dswrf.data.e1 <- dswrf.data[,,,1,]
pres_msl.data.e1 <- pres_msl.data[,,,1,]
pwat.data.e1 <- pwat.data[,,,1,]
spfh.data.e1 <- spfh.data[,,,1,]
tcdc.data.e1 <- tcdc.data[,,,1,]
tcolc.data.e1 <- tcolc.data[,,,1,]
tmax.data.e1 <- tmax.data[,,,1,]
tmin.data.e1 <- tmin.data[,,,1,]
tmp2m.data.e1 <- tmp2m.data[,,,1,]
tmpsfc.data.e1 <- tmpsfc.data[,,,1,]
ulwrfsfc.data.e1 <- ulwrfsfc.data[,,,1,]
ulwrfatm.data.e1 <- ulwrfatm.data[,,,1,]
uswrf.data.e1 <- uswrf.data[,,,1,]
dim(dlwrf.data.e1)
[1] 16 9 5 5113
Es la hora a la cual la estación meteorológica hace su pronóstico sobre la variable de interés. Esto sucede cinco veces en el día: Empieza a las 12, y se repite en intervalos de 3 horas, terminando a las 24.
#dimnames(dlwrf.data.e1)[[3]] <- namedSeq("fhour_", 24, steps=3, start=12)
#dimnames(dswrf.data.e1)[[3]] <- namedSeq("fhour_", 24, steps=3, start=12)
#print(dlwrf.data.e1[1,1,,1])
Es la fecha en la que se tomó la medición de la variable. Esta columna corresponde directamente con una de las respuestas en los datos de entrenamiento.
Sus valores son obtenidos de la variable time contenida
en el conjunto de datos.
# Todas las variables manejan el mismo tiempo
#time <- ncvar_get(dlwrf, "time")
# Diferente formato
#head(time)
#head(energy_train$Date)
Comparando la variable time con las fechas de los datos
de entrenamiento, se observa que su formato no coincide. Cada una está
representando su tiempo de forma distinta:
| Variable | Formato |
|---|---|
time |
Horas desde 1800-01-01 00:00:00 |
energy_train$Date |
Fecha formato YYYYMMDD |
Por lo tanto es necesario convertir nuestro tiempo en horas, a fecha.
#dates <- as.Date(as.POSIXct(time*3600, origin='1800-01-01 00:00:00', 'UTC'))
#dates <- gsub('-', '', dates)
# Además, energy_train$Date tiene sus fechas como ints
#dates <- strtoi(dates)
#head(dates)
#head(energy_train$Date)
Con estos valores, se pueden etiquetar nuestros predictores:
#dimnames(dlwrf.data.e1)[[4]] <- dates
#dimnames(dswrf.data.e1)[[4]] <- dates
#head(dlwrf.data.e1[1,1,1,])
En este primer experimento se hará un modelo solo para el sitio de generación ACME.
Para identificar los datos que son útiles para este sitio particular se utilizan las coordenadas:
print(names(stations))
[1] "stid" "nlat" "elon" "elev"
acme <- stations[stations["stid"] == "ACME"]
print(acme)
[1] "ACME" "34.80833" " -98.02325" " 397"
# Se redondea porque los archivos de los predictores tienen coordenadas enteras.
# Se tomará la medición más cercana.
acme.lat <- as.character(round(as.numeric(acme[2])))
acme.lon <- as.character(round(as.numeric(acme[3])))
print(c(acme.lat, acme.lon))
[1] "35" "-98"
Se determinó que las coordenadas más cercanas a ACME son: (35, -98).
Con estos datos extraemos las mediciones correspondientes.
acme.apcp <- apcp.data.e1[acme.lon, acme.lat, ,]
acme.dlwrf <- dlwrf.data.e1[acme.lon, acme.lat, ,]
acme.dswrf <- dswrf.data.e1[acme.lon, acme.lat, ,]
acme.pres_msl <- pres_msl.data.e1[acme.lon, acme.lat, ,]
acme.pwat <- pwat.data.e1[acme.lon, acme.lat, ,]
acme.spfh <- spfh.data.e1[acme.lon, acme.lat, ,]
acme.tcdc <- tcdc.data.e1[acme.lon, acme.lat, ,]
acme.tcolc <- tcolc.data.e1[acme.lon, acme.lat, ,]
acme.tmax <- tmax.data.e1[acme.lon, acme.lat, ,]
acme.tmin <- tmin.data.e1[acme.lon, acme.lat, ,]
acme.tmp2m <- tmp2m.data.e1[acme.lon, acme.lat, ,]
acme.tmpsfc <- tmpsfc.data.e1[acme.lon, acme.lat, ,]
acme.ulwrfsfc <- ulwrfsfc.data.e1[acme.lon, acme.lat, ,]
acme.ulwrfatm <- ulwrfatm.data.e1[acme.lon, acme.lat, ,]
acme.uswrf <- uswrf.data.e1[acme.lon, acme.lat, ,]
# Para simplificar este primer experimento, se decidió promediar las predicciones
# a lo largo del día
acme.apcp.means <- colMeans(acme.apcp)
acme.dlwrf.means <- colMeans(acme.dlwrf)
acme.dswrf.means <- colMeans(acme.dswrf)
acme.pres_msl.means <- colMeans(acme.pres_msl)
acme.pwat.means <- colMeans(acme.pwat)
acme.spfh.means <- colMeans(acme.spfh)
acme.tcdc.means <- colMeans(acme.tcdc)
acme.tcolc.means <- colMeans(acme.tcolc)
acme.tmax.means <- colMeans(acme.tmax)
acme.tmin.means <- colMeans(acme.tmin)
acme.tmp2m.means <- colMeans(acme.tmp2m)
acme.tmpsfc.means <- colMeans(acme.tmpsfc)
acme.ulwrfsfc.means <- colMeans(acme.ulwrfsfc)
acme.ulwrfatm.means <- colMeans(acme.ulwrfatm)
acme.uswrf.means <- colMeans(acme.uswrf)
acme.final <- data.frame(
Date = names(acme.dlwrf.means),
apcp = acme.apcp.means,
dlwrf = acme.dlwrf.means,
dswrf = acme.dswrf.means,
pres_msl = acme.pres_msl.means,
pwat = acme.pwat.means,
spfh = acme.spfh.means,
tcdc = acme.tcdc.means,
tcolc = acme.tcolc.means,
tmax = acme.tmax.means,
tmin = acme.tmin.means,
tmp2m = acme.tmp2m.means,
tmpsfc = acme.tmpsfc.means,
ulwrfsfc = acme.ulwrfsfc.means,
ulwrfatm = acme.ulwrfatm.means,
uswrf = acme.uswrf.means
)
print(acme.final)
Ya que se tienen los predictores, solo hace falta conocer cuánta
energía se generó en ACME con esos valores de dlwrf y
dswrf.
energy.acme <- energy_train[c("Date", "ACME")]
colnames(energy.acme) <- c("Date", "ACME")
# Hay que tener el mismo formato para combinarlo con nuestros predictores
energy.acme <- transform(energy.acme, Date=as.character(Date))
print(energy.acme)
standarized <- data.frame(scale(acme.final[-1]))
standarized$Date <- acme.final$Date
print(standarized)
NA
Y se combina con los predictores en un solo Dataframe para poder crear modelos.
energy.acme <- merge(energy.acme, standarized, by="Date")
print(names(energy.acme))
[1] "Date" "ACME" "apcp" "dlwrf" "dswrf" "pres_msl" "pwat" "spfh"
[9] "tcdc" "tcolc" "tmax" "tmin" "tmp2m" "tmpsfc" "ulwrfsfc" "ulwrfatm"
[17] "uswrf"
print(energy.acme)
sapply(energy.acme, max)
Date ACME apcp dlwrf
"20071231" "31347900" "17.1056882908331" "1.83686020832212"
dswrf pres_msl pwat spfh
"1.58382764172784" "3.92983998006173" "3.03949728936212" "2.60530505654799"
tcdc tcolc tmax tmin
"11.1400558389533" "11.1385546737371" "1.8228865226015" "1.77741740362991"
tmp2m tmpsfc ulwrfsfc ulwrfatm
"1.79220381183537" "1.78718974754215" "2.01964761831877" "1.94835844566315"
uswrf
"5.16075977202968"
cor(energy.acme[, -1])
ACME apcp dlwrf dswrf pres_msl pwat spfh
ACME 1.0000000 -0.2840400586 0.3491611 0.8778619 -0.22947616 0.2553372 0.43595520
apcp -0.2840401 1.0000000000 0.2203777 -0.2807715 -0.11086159 0.2960540 0.18355915
dlwrf 0.3491611 0.2203777294 1.0000000 0.4338425 -0.54851462 0.9309265 0.94527859
dswrf 0.8778619 -0.2807714954 0.4338425 1.0000000 -0.27955383 0.3628511 0.52713656
pres_msl -0.2294762 -0.1108615897 -0.5485146 -0.2795538 1.00000000 -0.4084331 -0.52940651
pwat 0.2553372 0.2960540131 0.9309265 0.3628511 -0.40843311 1.0000000 0.91677752
spfh 0.4359552 0.1835591499 0.9452786 0.5271366 -0.52940651 0.9167775 1.00000000
tcdc -0.4650994 0.7285235818 0.1557405 -0.5047511 -0.03909404 0.2023303 0.04074563
tcolc -0.4645678 0.7286818411 0.1557277 -0.5043353 -0.03910877 0.2024014 0.04086606
tmax 0.6303386 -0.0008306776 0.8854478 0.6939541 -0.57883521 0.7734407 0.87645530
tmin 0.5822438 0.0450384282 0.9200566 0.6485997 -0.58874096 0.8129532 0.90190697
tmp2m 0.6291758 0.0053514732 0.8948689 0.6939023 -0.57967297 0.7850970 0.88663755
tmpsfc 0.6463814 -0.0089508039 0.8886781 0.7192815 -0.56014920 0.7831707 0.88277237
ulwrfsfc 0.6356217 -0.0015016159 0.8971830 0.7087908 -0.54943239 0.7949466 0.88742749
ulwrfatm 0.6512972 -0.3985206332 0.2718145 0.6526874 -0.22785866 0.1481589 0.34283083
uswrf 0.8212341 -0.3278286473 0.2525889 0.9210409 -0.13646296 0.2074573 0.36971417
tcdc tcolc tmax tmin tmp2m tmpsfc
ACME -0.46509936 -0.46456783 0.6303386448 0.58224382 0.629175849 0.646381419
apcp 0.72852358 0.72868184 -0.0008306776 0.04503843 0.005351473 -0.008950804
dlwrf 0.15574051 0.15572770 0.8854477788 0.92005664 0.894868866 0.888678124
dswrf -0.50475105 -0.50433528 0.6939541371 0.64859966 0.693902261 0.719281548
pres_msl -0.03909404 -0.03910877 -0.5788352053 -0.58874096 -0.579672966 -0.560149202
pwat 0.20233032 0.20240137 0.7734406650 0.81295320 0.785096983 0.783170715
spfh 0.04074563 0.04086606 0.8764553000 0.90190697 0.886637554 0.882772372
tcdc 1.00000000 0.99995623 -0.1521365636 -0.09206669 -0.140820060 -0.158556827
tcolc 0.99995623 1.00000000 -0.1518762606 -0.09183883 -0.140573381 -0.158302189
tmax -0.15213656 -0.15187626 1.0000000000 0.99331884 0.998615508 0.995845682
tmin -0.09206669 -0.09183883 0.9933188355 1.00000000 0.994865498 0.990414611
tmp2m -0.14082006 -0.14057338 0.9986155075 0.99486550 1.000000000 0.997329176
tmpsfc -0.15855683 -0.15830219 0.9958456821 0.99041461 0.997329176 1.000000000
ulwrfsfc -0.15195658 -0.15169159 0.9945109861 0.99161561 0.994554449 0.997264962
ulwrfatm -0.67011208 -0.66971579 0.5489579379 0.50186737 0.538280699 0.547584885
uswrf -0.56287509 -0.56242547 0.5172900016 0.46617622 0.514927729 0.536286585
ulwrfsfc ulwrfatm uswrf
ACME 0.635621652 0.6512972 0.8212341
apcp -0.001501616 -0.3985206 -0.3278286
dlwrf 0.897183021 0.2718145 0.2525889
dswrf 0.708790754 0.6526874 0.9210409
pres_msl -0.549432393 -0.2278587 -0.1364630
pwat 0.794946617 0.1481589 0.2074573
spfh 0.887427493 0.3428308 0.3697142
tcdc -0.151956579 -0.6701121 -0.5628751
tcolc -0.151691588 -0.6697158 -0.5624255
tmax 0.994510986 0.5489579 0.5172900
tmin 0.991615608 0.5018674 0.4661762
tmp2m 0.994554449 0.5382807 0.5149277
tmpsfc 0.997264962 0.5475849 0.5362866
ulwrfsfc 1.000000000 0.5452354 0.5349272
ulwrfatm 0.545235398 1.0000000 0.6390446
uswrf 0.534927176 0.6390446 1.0000000
plot(energy.acme$ulwrfsfc, energy.acme$tmax)
lmm.fit <- lm(ACME ~ poly(dlwrf, 2) + poly(dswrf, 3) + ulwrfatm + tmax +
tmpsfc + pwat, data=energy.acme)
plot(lmm.fit)
print(vif(lmm.fit))
GVIF Df GVIF^(1/(2*Df))
poly(dlwrf, 2) 36.884387 2 2.464397
poly(dswrf, 3) 6.847564 3 1.378022
ulwrfatm 2.285942 1 1.511933
tmax 161.632988 1 12.713496
tmpsfc 211.511798 1 14.543445
pwat 12.405863 1 3.522196
summary(lmm.fit)
Call:
lm(formula = ACME ~ poly(dlwrf, 2) + poly(dswrf, 3) + ulwrfatm +
tmax + tmpsfc + pwat, data = energy.acme)
Residuals:
Min 1Q Median 3Q Max
-21072236 -1381404 450963 1876483 19444052
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 16877462 46821 360.471 < 2e-16 ***
poly(dlwrf, 2)1 -113663874 17606981 -6.456 1.18e-10 ***
poly(dlwrf, 2)2 81608052 4045841 20.171 < 2e-16 ***
poly(dswrf, 3)1 370690935 7361687 50.354 < 2e-16 ***
poly(dswrf, 3)2 68247238 3866372 17.651 < 2e-16 ***
poly(dswrf, 3)3 -14482782 3527912 -4.105 4.10e-05 ***
ulwrfatm 244157 70797 3.449 0.000568 ***
tmax 1904855 595312 3.200 0.001384 **
tmpsfc 2333044 681000 3.426 0.000618 ***
pwat -2233862 164928 -13.545 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3348000 on 5103 degrees of freedom
Multiple R-squared: 0.8193, Adjusted R-squared: 0.819
F-statistic: 2571 on 9 and 5103 DF, p-value: < 2.2e-16
Best yet R^2: 0.8193 RSE: 3 348 000 Formula: ACME ~ poly(dlwrf, 2) + poly(dswrf, 3) + ulwrfatm + tmax + tmpsfc + pwat
Mean ACME: 16 877 462 RSE ~ 19.83%
mean(energy.acme$ACME)
[1] 16877462
adax <- stations[stations["stid"] == "ADAX"]
# Se redondea porque los archivos de los predictores tienen coordenadas enteras.
# Se tomará la medición más cercana.
adax.lat <- as.character(round(as.numeric(acme[2])))
adax.lon <- as.character(round(as.numeric(acme[3])))
print(c(adax.lat, adax.lon))
[1] "35" "-98"
# Primero obtener las variables de interés
apcp.test.data <- ncvar_get(apcp, "Total_precipitation")
dlwrf.test.data <- ncvar_get(dlwrf, "Downward_Long-Wave_Rad_Flux")
dswrf.test.data <- ncvar_get(dswrf, "Downward_Short-Wave_Rad_Flux")
pres_msl.test.data <- ncvar_get(pres_msl, "Pressure")
pwat.test.data <- ncvar_get(pwat, "Precipitable_water")
spfh.test.data <- ncvar_get(spfh, "Specific_humidity_height_above_ground")
tcdc.test.data <- ncvar_get(tcdc, "Total_cloud_cover")
tcolc.test.data <- ncvar_get(tcolc, "Total_Column-Integrated_Condensate")
tmax.test.data <- ncvar_get(tmax, "Maximum_temperature")
tmin.test.data <- ncvar_get(tmin, "Minimum_temperature")
tmp2m.test.data <- ncvar_get(tmp.2m, "Temperature_height_above_ground")
tmpsfc.test.data <- ncvar_get(tmp.sfc, "Temperature_surface")
ulwrfsfc.test.data <- ncvar_get(ulwrf.sfc, "Upward_Long-Wave_Rad_Flux_surface")
ulwrfatm.test.data <- ncvar_get(ulwrf.atm, "Upward_Long-Wave_Rad_Flux")
uswrf.test.data <- ncvar_get(uswrf, "Upward_Short-Wave_Rad_Flux")
apcp.test.data <- nameCols(apcp.test.data)
dlwrf.test.data <- nameCols(dlwrf.test.data)
dswrf.test.data <- nameCols(dswrf.test.data)
pres_msl.test.data <- nameCols(pres_msl.test.data)
pwat.test.data <- nameCols(pwat.test.data)
spfh.test.data <- nameCols(spfh.test.data)
tcdc.test.data <- nameCols(tcdc.test.data)
tcolc.test.data <- nameCols(tcolc.test.data)
tmax.test.data <- nameCols(tmax.test.data)
tmin.test.data <- nameCols(tmin.test.data)
tmp2m.test.data <- nameCols(tmp2m.test.data)
tmpsfc.test.data <- nameCols(tmpsfc.test.data)
ulwrfsfc.test.data <- nameCols(ulwrfsfc.test.data)
ulwrfatm.test.data <- nameCols(ulwrfatm.test.data)
uswrf.test.data <- nameCols(uswrf.test.data)
apcp.test.data.e1 <- apcp.test.data[,,,1,]
dlwrf.test.data.e1 <- dlwrf.test.data[,,,1,]
dswrf.test.data.e1 <- dswrf.test.data[,,,1,]
pres_msl.test.data.e1 <- pres_msl.test.data[,,,1,]
pwat.test.data.e1 <- pwat.test.data[,,,1,]
spfh.test.data.e1 <- spfh.test.data[,,,1,]
tcdc.test.data.e1 <- tcdc.test.data[,,,1,]
tcolc.test.data.e1 <- tcolc.test.data[,,,1,]
tmax.test.data.e1 <- tmax.test.data[,,,1,]
tmin.test.data.e1 <- tmin.test.data[,,,1,]
tmp2m.test.data.e1 <- tmp2m.test.data[,,,1,]
tmpsfc.test.data.e1 <- tmpsfc.test.data[,,,1,]
ulwrfsfc.test.data.e1 <- ulwrfsfc.test.data[,,,1,]
ulwrfatm.test.data.e1 <- ulwrfatm.test.data[,,,1,]
uswrf.test.data.e1 <- uswrf.test.data[,,,1,]
dim(dlwrf.data.e1)
[1] 16 9 5 5113
adax.test.apcp <- apcp.test.data.e1[adax.lon, adax.lat, ,]
adax.test.dlwrf <- dlwrf.test.data.e1[adax.lon, adax.lat, ,]
adax.test.dswrf <- dswrf.test.data.e1[adax.lon, adax.lat, ,]
adax.test.pres_msl <- pres_msl.test.data.e1[adax.lon, adax.lat, ,]
adax.test.pwat <- pwat.test.data.e1[adax.lon, adax.lat, ,]
adax.test.spfh <- spfh.test.data.e1[adax.lon, adax.lat, ,]
adax.test.tcdc <- tcdc.test.data.e1[adax.lon, adax.lat, ,]
adax.test.tcolc <- tcolc.test.data.e1[adax.lon, adax.lat, ,]
adax.test.tmax <- tmax.test.data.e1[adax.lon, adax.lat, ,]
adax.test.tmin <- tmin.test.data.e1[adax.lon, adax.lat, ,]
adax.test.tmp2m <- tmp2m.test.data.e1[adax.lon, adax.lat, ,]
adax.test.tmpsfc <- tmpsfc.test.data.e1[adax.lon, adax.lat, ,]
adax.test.ulwrfsfc <- ulwrfsfc.test.data.e1[adax.lon, adax.lat, ,]
adax.test.ulwrfatm <- ulwrfatm.test.data.e1[adax.lon, adax.lat, ,]
adax.test.uswrf <- uswrf.test.data.e1[adax.lon, adax.lat, ,]
adax.test.apcp.means <- colMeans(adax.test.apcp)
adax.test.dlwrf.means <- colMeans(adax.test.dlwrf)
adax.test.dswrf.means <- colMeans(adax.test.dswrf)
adax.test.pres_msl.means <- colMeans(adax.test.pres_msl)
adax.test.pwat.means <- colMeans(adax.test.pwat)
adax.test.spfh.means <- colMeans(adax.test.spfh)
adax.test.tcdc.means <- colMeans(adax.test.tcdc)
adax.test.tcolc.means <- colMeans(adax.test.tcolc)
adax.test.tmax.means <- colMeans(adax.test.tmax)
adax.test.tmin.means <- colMeans(adax.test.tmin)
adax.test.tmp2m.means <- colMeans(adax.test.tmp2m)
adax.test.tmpsfc.means <- colMeans(adax.test.tmpsfc)
adax.test.ulwrfsfc.means <- colMeans(adax.test.ulwrfsfc)
adax.test.ulwrfatm.means <- colMeans(adax.test.ulwrfatm)
adax.test.uswrf.means <- colMeans(adax.test.uswrf)
adax.final.test <- data.frame(
Date = names(adax.test.dlwrf.means),
apcp = adax.test.apcp.means,
dlwrf = adax.test.dlwrf.means,
dswrf = adax.test.dswrf.means,
pres_msl = adax.test.pres_msl.means,
pwat = adax.test.pwat.means,
spfh = adax.test.spfh.means,
tcdc = adax.test.tcdc.means,
tcolc = adax.test.tcolc.means,
tmax = adax.test.tmax.means,
tmin = adax.test.tmin.means,
tmp2m = adax.test.tmp2m.means,
tmpsfc = adax.test.tmpsfc.means,
ulwrfsfc = adax.test.ulwrfsfc.means,
ulwrfatm = adax.test.ulwrfatm.means,
uswrf = adax.test.uswrf.means
)
print(adax.final.test)
energy.adax.test <- energy_train[c("Date", "ADAX")]
colnames(energy.adax.test) <- c("Date", "ADAX")
# Hay que tener el mismo formato para combinarlo con nuestros predictores
energy.adax.test <- transform(energy.adax.test, Date=as.character(Date))
standarized.test <- data.frame(scale(adax.final.test[-1]))
standarized.test$Date <- adax.final.test$Date
print(standarized.test)
Y se combina con los predictores en un solo Dataframe para poder crear modelos.
energy.adax.test <- merge(energy.adax.test, standarized.test, by="Date")
sapply(energy.adax.test, class)
Date ADAX apcp dlwrf dswrf pres_msl pwat
"character" "integer" "numeric" "numeric" "numeric" "numeric" "numeric"
spfh tcdc tcolc tmax tmin tmp2m tmpsfc
"numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
ulwrfsfc ulwrfatm uswrf
"numeric" "numeric" "numeric"
predictions <- predict(lmm.fit, energy.adax.test[-1])
y <- energy.adax.test["ADAX"]
print(100*sum((y - predictions)**2) / sum((y - mean(y$ADAX))**2))
[1] 23.82584